full_join(occ_rates_min, by = c("soc_minor", "gender")) %>%
full_join(education_long, by = c("id","wave")) %>%
full_join(region_long, by = c("id","wave")) %>%
full_join(subjClass_long, by = c("id","wave")) %>%
full_join(income_long, by = c("id","wave")) %>%
full_join(housing_long, by = c("id","wave"))
bes_reduced <- bes_reduced %>%
mutate(occ_unemp_1dig = case_when(wave %in% 20 ~ our_aprjun20,
wave %in% c(17,18,19) ~ our_octdec19,
wave %in% c(15,16) ~ our_janmar19,
wave %in% 14 ~ our_aprjun18),
occ_unemp_3dig = case_when(wave %in% 20 ~ our_aprjun20_3dig,
wave %in% c(17,18,19) ~ our_octdec19_3dig,
wave %in% c(15,16) ~ our_janmar19_3dig,
wave %in% 14 ~ our_aprjun18_3dig))
bes_reduced <- bes_reduced %>%
mutate(wave = as.numeric(wave),
pandemic = ifelse(wave > 19, 1, 0),
OccHumanProx = proximity * pandemic) %>%
filter(wave >= 14 & in_wave == 1,
age > 15 & age <= 65)
save(bes_reduced, file = "../working/bes_reduced.Rdata")
save(bes_reduced, file = "working/bes_reduced.Rdata")
save(bes_reduced, file = "/working/bes_reduced.Rdata")
getwd()
dir.create("working")
setwd("~/Dropbox/Academic/projects/covid/CovidRisk/replication")
dir.create("working")
dir.create("working")
source("01_prep_bes.R")
save(bes_reduced, file = "/working/bes_reduced.Rdata")
save(bes_reduced, file = "working/bes_reduced.Rdata")
rm(list = ls())
## Load libraries
library(plm)
library(texreg)
library(tidyverse)
library(broom)
library(margins)
rm(list = ls())
## Load libraries
library(plm)
library(texreg)
library(tidyverse)
library(broom)
library(margins)
sessionInfo()
rm(list = ls())
## Load libraries
library(plm) # v.2.6-1
library(texreg) # v.1.38.6
library(tidyverse) # v.1.3.2
library(broom) # v.0.8.0
library(margins) # v.0.3.26
load("data/bes_reduced.Rdata")
bes_reduced$OccHumanProx <- as.numeric(scale(bes_reduced$OccHumanProx))
load("working/bes_reduced.Rdata")
bes_reduced$OccHumanProx <- as.numeric(scale(bes_reduced$OccHumanProx))
bes_reduced$OccHumanProx_binned <- factor(as.numeric(bes_reduced$proximity_binned) * bes_reduced$pandemic + 1, labels = c("low", "low", "mid", "high"))
bes_reduced %>%
filter(wave %in% c(14,15,18,20)) %>%
group_by(wave) %>%
summarise(rho = cor(redistSelf, taxSpendSelf, use=  "complete.obs"))
analysis_func <- function(var, occ_unemployed_digit = 1){
if(occ_unemployed_digit == 1) bes_reduced$occ_unemp <- bes_reduced$occ_unemp_1dig
if(occ_unemployed_digit == 3) bes_reduced$occ_unemp <- bes_reduced$occ_unemp_3dig
print(var)
bes_reduced$outcome_scale <- as.numeric(scale(bes_reduced[[var]]))
## Model 0.1
plm_out_0.1 <- lm(outcome_scale ~ proximity_binned + unemployed + occ_unemp + as.factor(wave) - 1,
data = bes_reduced)
plm_out_0.1 %>% tidy(conf.int = TRUE) %>%
mutate(term = factor(gsub("proximity_binned","Proximity to others: ", term),
levels = c("Proximity to others: low",
"Proximity to others: mid",
"Proximity to others: high"))) %>%
filter(grepl("Proximity", term)) %>%
ggplot(aes(x = estimate, y = term, xmin = conf.low, xmax = conf.high)) + geom_pointrange() +
ylab("") +
xlab("Pooled estimate (no fixed effects)") +
theme_bw() +
ggtitle(var)
if(occ_unemployed_digit == 1) ggsave(paste0("../output/proximity_",var,"_pooled.png"), width = 7, height = 5)
## Model 1
plm_out_1 <- plm(outcome_scale ~ OccHumanProx + unemployed + occ_unemp + as.factor(wave),
effect = "individual",
index = c("id"),
model = "within",
data = bes_reduced)
vcov_1 <- vcov(plm_out_1)
mfx_1 <- tidy(plm_out_1, conf.int = TRUE) %>% filter(term == "OccHumanProx")
mfx_1$model <- 1
## Model 2
plm_out_2 <- plm(outcome_scale ~ OccHumanProx * workHomeCoronaSelf_binary + unemployed + occ_unemp + as.factor(wave),
effect = "individual",
index = c("id"),
model = "within",
data = bes_reduced)
vcov_2 <- vcov(plm_out_2)
mfx_2_pe <- c(coef(plm_out_2)["OccHumanProx"],
coef(plm_out_2)["OccHumanProx"] + coef(plm_out_2)["OccHumanProx:workHomeCoronaSelf_binary"])
names(mfx_2_pe) <- c("OccHumanProx * WorkHome:No", "OccHumanProx * WorkHome:Yes")
mfx_2_se <- c(sqrt(diag(vcov_2))["OccHumanProx"],
sqrt(diag(vcov_2)["OccHumanProx"] + diag(vcov_2)["OccHumanProx:workHomeCoronaSelf_binary"] + (2*vcov_2["OccHumanProx","OccHumanProx:workHomeCoronaSelf_binary"])))
names(mfx_2_se) <- names(mfx_2_pe)
mfx_2 <- tibble(term = names(mfx_2_se),estimate = mfx_2_pe, std.error = mfx_2_se, conf.low = mfx_2_pe - 1.96*mfx_2_se, conf.high = mfx_2_pe + 1.96*mfx_2_se)
mfx_2$model <- 2
## Model 3
plm_out_3 <- plm(outcome_scale ~ factor(OccHumanProx_binned) + unemployed + occ_unemp + as.factor(wave),
effect = "individual",
index = c("id"),
model = "within",
data = bes_reduced)
vcov_3 <- vcov(plm_out_3)
mfx_3 <- tidy(plm_out_3, conf.int = TRUE) %>%
filter(grepl("OccHumanProx", term)) %>%
mutate(term = c("OccHumanProx:Mid", "OccHumanProx:High"))
mfx_3$model <- 3
## Model 4
plm_out_4 <- plm(outcome_scale ~ factor(OccHumanProx_binned) * workHomeCoronaSelf_binary + unemployed + occ_unemp + as.factor(wave),
effect = "individual",
index = c("id"),
model = "within",
data = bes_reduced)
vcov_4 <- vcov(plm_out_4)
mfx_4_pe <- c(coef(plm_out_4)["factor(OccHumanProx_binned)mid"],
coef(plm_out_4)["factor(OccHumanProx_binned)mid"] + coef(plm_out_4)["factor(OccHumanProx_binned)mid:workHomeCoronaSelf_binary"],
coef(plm_out_4)["factor(OccHumanProx_binned)high"],
coef(plm_out_4)["factor(OccHumanProx_binned)high"] + coef(plm_out_4)["factor(OccHumanProx_binned)high:workHomeCoronaSelf_binary"])
names(mfx_4_pe) <- c("OccHumanProx:Mid * WorkHome:No", "OccHumanProx:Mid * WorkHome:Yes",
"OccHumanProx:High * WorkHome:No", "OccHumanProx:High * WorkHome:Yes")
mfx_4_se <- c(sqrt(diag(vcov_4))["factor(OccHumanProx_binned)mid"],
sqrt(diag(vcov_4)["factor(OccHumanProx_binned)mid"] + diag(vcov_4)["factor(OccHumanProx_binned)mid:workHomeCoronaSelf_binary"] + (2*vcov_4["factor(OccHumanProx_binned)mid","factor(OccHumanProx_binned)mid:workHomeCoronaSelf_binary"])),
sqrt(diag(vcov_4))["factor(OccHumanProx_binned)high"],
sqrt(diag(vcov_4)["factor(OccHumanProx_binned)high"] + diag(vcov_4)["factor(OccHumanProx_binned)high:workHomeCoronaSelf_binary"] + (2*vcov_4["factor(OccHumanProx_binned)high","factor(OccHumanProx_binned)high:workHomeCoronaSelf_binary"]))
)
names(mfx_4_se)  <- names(mfx_4_pe)
mfx_4 <- tibble(term = names(mfx_4_se),estimate = mfx_4_pe, std.error = mfx_4_se, conf.low = mfx_4_pe - 1.96*mfx_4_se, conf.high = mfx_4_pe + 1.96*mfx_4_se)
mfx_4$model <- 4
mfx <- bind_rows(mfx_1, mfx_2, mfx_3, mfx_4)
mfx <- mfx %>%
mutate(term = gsub("OccHumanProx","OccProxRisk",term))
mfx <- mfx %>%
mutate(term = gsub("\\*", "x",term),
term = factor(term, levels = c("OccProxRisk",
"OccProxRisk x WorkHome:No",
"OccProxRisk x WorkHome:Yes",
"OccProxRisk:Mid",
"OccProxRisk:High",
"OccProxRisk:Mid x WorkHome:No",
"OccProxRisk:Mid x WorkHome:Yes",
"OccProxRisk:High x WorkHome:No",
"OccProxRisk:High x WorkHome:Yes")),
model = paste0("Model ", model))
mfx %>%
ggplot(aes(x = estimate, y = term, group = model, xmin = conf.low, xmax = conf.high)) + geom_pointrange() +
facet_wrap(~model, scales = "free_y", ncol = 2) +
theme_bw() +
geom_vline(xintercept = 0, linetype = 2) +
ggtitle(paste0("Treatment effects: ", var)) +
xlab("Estimate") + ylab("")
if(occ_unemployed_digit == 1) ggsave(paste0("../output/treatment_effects_",var,".png"), width = 10, height = 5)
mfx %>%
filter(model != "Model 3") %>%
mutate(model = gsub("Model 4", "Model 3", model)) %>%
ggplot(aes(x = estimate, y = term, group = model, xmin = conf.low, xmax = conf.high)) + geom_pointrange() +
facet_wrap(~model, scales = "free_y", ncol = 3) +
theme_bw() +
geom_vline(xintercept = 0, linetype = 2) +
ggtitle(paste0("Treatment effects: ", var)) +
xlab("Estimate") + ylab("")
if(occ_unemployed_digit == 1) ggsave(paste0("../output/treatment_effects_paper_",var,".png"), width = 10, height = 3)
sink(paste0("../output/",var,"_fe_models.txt"))
cat(screenreg(list(plm_out_1, plm_out_2, plm_out_3, plm_out_4)))
sink()
plm_out_leads <- plm(outcome_scale ~ proximity * as.factor(wave) + unemployed + occ_unemp,
effect = "individual",
index = c("id"),
model = "within",
data = bes_reduced)
tidy(plm_out_leads, conf.int = TRUE) %>%
filter(grepl("proximity", term)) %>%
mutate(term = as.numeric(gsub("\\D+","", term))) %>%
ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) +
geom_pointrange() +
theme_bw() +
xlab("Survey wave") +
ylab("Marginal effect of occupational proximity risk") +
geom_vline(xintercept = 19.5, linetype = 2) +
geom_hline(yintercept = 0, linetype = 3) +
ggtitle(paste0("Parallel trends: ",var))
if(occ_unemployed_digit == 1) ggsave(paste0("../output/parallel_trends_",var,".png"), width = 8, height = 4)
bes_reduced_tmp <- bes_reduced %>%
mutate(outcome_scale = as.numeric((get(var)))) %>%
mutate(date = case_when(wave == 14 ~ "2018-05-14",
wave == 15 ~ "2019-03-11",
wave == 16 ~ "2019-05-24",
wave == 17 ~ "2019-11-01",
wave == 18 ~ "2019-11-13",
wave == 19 ~ "2019-12-13",
wave == 20 ~ "2020-06-03"),
date = as.Date(date))
ylab_title <- var
bes_reduced_tmp %>%
group_by(proximity_binned, date) %>%
summarise(taxSpendSelf_est = mean(outcome_scale, na.rm = TRUE),
taxSpendSelf_lo = ifelse(sum(!is.na(outcome_scale)) > 0, t.test(outcome_scale)$conf[1], NA),
taxSpendSelf_high = ifelse(sum(!is.na(outcome_scale)) > 0, t.test(outcome_scale)$conf[2], NA)) %>%
filter(!is.na(proximity_binned) & !is.na(taxSpendSelf_est)) %>%
ungroup() %>%
ggplot(aes(x = date, y = taxSpendSelf_est, ymin = taxSpendSelf_lo, ymax = taxSpendSelf_high, col = proximity_binned)) +
geom_pointrange(position = position_dodge(.1)) +
geom_line() +
theme_bw() +
theme(legend.position="bottom") +
scale_color_manual("OccProximityRisk", values = c("black", "red", "blue"), labels = c("Low", "Mid", "High")) +
ylab(ylab_title) + xlab("Survey date") +
scale_x_date(breaks = as.Date(c(unique(bes_reduced_tmp$date[!is.na(bes_reduced_tmp[[var]])]),"2020-03-26")), date_labels = "%b-%Y") +
geom_vline(xintercept = as.Date("2020-03-26"), linetype = 2)
ggsave(paste0("../output/parallel_trends_2_",var,".png"), width = 9, height = 4)
out <- list(plm_out_0.1, plm_out_1, plm_out_2, plm_out_3, plm_out_4)
return(out)
}
dvs <- c("redistSelf", "taxSpendSelf", "riskUnemployment", "riskPoverty")
models_out_3dig <- lapply(dvs, function(x) analysis_func(x, occ_unemployed_digit = 3))
models_out_1dig <- lapply(dvs, function(x) analysis_func(x, occ_unemployed_digit = 1))
names(models_out_3dig) <- names(models_out_1dig) <- dvs
library(stargazer)
sink("output/taxSpendSelf_regressions.tex")
stargazer(models_out_1dig$taxSpendSelf[c(2,3,5)],
keep = c("OccHumanProx","unemployed","occ_unemp"),
keep.stat = c("n"),
no.space = TRUE,
covariate.labels = c("OccProximityRisk",
"OccProximityRisk: Mid",
"OccProximityRisk: High",
"Unemployed",
"Occ. unemployment rate",
"OccProximityRisk * workHome",
"OccProximityRisk: Mid * workHome",
"OccProximityRisk: High * workHome"
),
float = FALSE,
dep.var.caption = "",
dep.var.labels = "taxSpendSelf",
add.lines = list(c("Individual FEs", "Yes", "Yes", "Yes"),
c("Wave FEs", "Yes", "Yes", "Yes")))
sink()
sink("output/redistSelf_regressions.tex")
stargazer(models_out_1dig$redistSelf[c(2,3,5)],
keep = c("OccHumanProx","unemployed","occ_unemp"),
keep.stat = c("n"),
no.space = TRUE,
covariate.labels = c("OccProximityRisk",
"OccProximityRisk: Mid",
"OccProximityRisk: High",
"Unemployed",
"Occ. unemployment rate",
"OccProximityRisk * workHome",
"OccProximityRisk: Mid * workHome",
"OccProximityRisk: High * workHome"
),
float = FALSE,
dep.var.caption = "",
dep.var.labels = "redistSelf",
add.lines = list(c("Individual FEs", "Yes", "Yes", "Yes"),
c("Wave FEs", "Yes", "Yes", "Yes")))
sink()
sink("output/taxSpendSelf_regressions_3dig.tex")
stargazer(models_out_3dig$taxSpendSelf[c(2,3,5)],
keep = c("OccHumanProx","unemployed","occ_unemp"),
keep.stat = c("n"),
no.space = TRUE,
covariate.labels = c("OccProximityRisk",
"OccProximityRisk: Mid",
"OccProximityRisk: High",
"Unemployed",
"Occ. unemployment rate",
"OccProximityRisk * workHome",
"OccProximityRisk: Mid * workHome",
"OccProximityRisk: High * workHome"
),
float = FALSE,
dep.var.caption = "",
dep.var.labels = "taxSpendSelf",
add.lines = list(c("Individual FEs", "Yes", "Yes", "Yes"),
c("Wave FEs", "Yes", "Yes", "Yes")))
sink()
sink("output/redistSelf_regressions_3dig.tex")
stargazer(models_out_3dig$redistSelf[c(2,3,5)],
keep = c("OccHumanProx","unemployed","occ_unemp"),
keep.stat = c("n"),
no.space = TRUE,
covariate.labels = c("OccProximityRisk",
"OccProximityRisk: Mid",
"OccProximityRisk: High",
"Unemployed",
"Occ. unemployment rate",
"OccProximityRisk * workHome",
"OccProximityRisk: Mid * workHome",
"OccProximityRisk: High * workHome"
),
float = FALSE,
dep.var.caption = "",
dep.var.labels = "redistSelf",
add.lines = list(c("Individual FEs", "Yes", "Yes", "Yes"),
c("Wave FEs", "Yes", "Yes", "Yes")))
sink()
cov_attitudes <- c("worryCoronaHealth", "dutyCorona", "coronaDied", "hadCovidSelf", "cvFreedomSelf", "cvEconSelf")
lapply(cov_attitudes, function(x) {
bes_reduced_tmp <- bes_reduced[bes_reduced$wave == 20,]
bes_reduced_tmp$outcome_tmp <- as.numeric(scale(bes_reduced_tmp[[x]]))
tmp <- tidy(lm(outcome_tmp ~ scale(OccHumanProx), bes_reduced_tmp), conf.int = TRUE)
tmp$outcome <- x
tmp$controls <- FALSE
tmp2 <- tidy(lm(outcome_tmp ~ scale(OccHumanProx) + educ + region + income + age + I(age^2) + housing, bes_reduced_tmp), conf.int = TRUE)
tmp2$outcome <- x
tmp2$controls <- TRUE
rbind(tmp, tmp2)
}) %>%
bind_rows() %>%
filter(grepl("OccHumanProx",term)) %>%
mutate(controls = ifelse(controls, "...with controls", "...without controls")) %>%
mutate(outcome = case_when(outcome == "worryCoronaHealth" ~ "Worried about catching COVID",
outcome == "hadCovidSelf" ~ "Reports having had COVID",
outcome == "dutyCorona" ~ "There is a duty to follow COVID rules",
outcome == "cvFreedomSelf" ~ "Restrict personal freedoms to fight infections",
outcome == "cvEconSelf" ~ "Reduce infections even if it damages the economy",
outcome == "coronaDied" ~ "Personally knows someone who died from COVID"),
outcome = fct_reorder(outcome, estimate)) %>%
ggplot(aes(x = estimate, y = outcome, xmin = conf.low, xmax = conf.high, col = controls, group = controls)) + geom_pointrange(position = position_dodge(width = .2)) +
#facet_wrap(~outcome, ncol = 4) +
theme_bw() +
geom_vline(xintercept = 0, linetype = 2) +
xlab("Estimate") +
ylab("") +
ggtitle("Proximity risk and COVID experiences/attitudes") +
scale_color_manual("Effect of proximity risk...", values= c("black", "gray"))
ggsave("output/proximity_subjective_validation.png", width = 8, height = 4.5)
exposure <- read_csv("data/occupations_exposure_to_disease_data.csv") %>%
transmute(soc2010 = `UK SOC 2010 Code`,
proximity = `Proximity to others`,
proximity_binned = factor(cut(proximity, 3), labels = c("low","mid","high")))
death_rates <- readxl::read_excel("data/covid_death_rates.xlsx", sheet = 11, skip = 16) %>%
filter(!is.na(`Deaths involving COVID-19...4`)) %>%
mutate(total_covid_deaths = `Deaths involving COVID-19...4` + `Deaths involving COVID-19...8`,
total_deaths = `Average all cause mortality (2015 to 2019)...6` + `Average all cause mortality (2015 to 2019)...10`,#`All causes of death...5` + `All causes of death...9`,
covid_death_rate = total_covid_deaths/total_deaths,
soc2010 = `...1`) %>%
select(soc2010, total_covid_deaths, total_deaths, covid_death_rate)
death_rates <- full_join(exposure, death_rates)
death_rates <- death_rates %>% filter(covid_death_rate != Inf)
summary(lm(covid_death_rate ~ proximity, death_rates, weights = total_deaths))
summary(lm(covid_death_rate ~ proximity_binned, death_rates, weights = total_deaths))
pred_deaths <- predict(lm(covid_death_rate ~ proximity, death_rates, weights = total_deaths),
newdata = data.frame(proximity = quantile(death_rates$proximity, c(.10, .90), na.rm = TRUE)))
diff(pred_deaths)/pred_deaths[1]
death_rates %>%
ggplot(aes(x = proximity, y = covid_death_rate, size = total_deaths/20)) +
geom_point(alpha = .2) +
scale_size_continuous(guide = FALSE) +
geom_smooth(method = "lm", mapping = aes(weight = total_deaths/2), col = "black", size = 1.5) +
xlab("Occupational proximity risk") +
ylab("Occupational COVID death rate") +
ggtitle("Proximity risk and COVID death rate") +
theme_bw()
ggsave("output/proximity_objective_validation.png", width = 6, height = 5)
exposure <- read_csv("data/occupations_exposure_to_disease_data.csv") %>%
transmute(occ_title = `Occupation title`,
soc2010 = `UK SOC 2010 Code`,
proximity = `Proximity to others`,
proximity_binned = factor(cut(proximity, 3), labels = c("low","mid","high")))
high_risk <- exposure[order(exposure$proximity, decreasing = T),][1:10,]
low_risk <- exposure[order(exposure$proximity, decreasing = F),][1:10,]
out <- data.frame(Occupation = c(high_risk$occ_title, "...", low_risk$occ_title[nrow(low_risk):1]),
OccProxRisk = c(high_risk$proximity, "...",low_risk$proximity[nrow(low_risk):1]))
colnames(out) <- c("Occupation", "$OccProxRisk$")
library(xtable)
sink("output/risky_occupations.tex")
print.xtable(xtable(out), include.rownames = FALSE, floating = FALSE, sanitize.colnames.function = function(x) {x})
sink()
sessionInfo()
sink("output/risky_occupations.tex")
print.xtable(xtable(out), include.rownames = FALSE, floating = FALSE, sanitize.colnames.function = function(x) {x})
sink()
wfh_prox <- bes_reduced %>%
filter(wave == 20) %>%
group_by(soc2010) %>%
summarise(wfh = mean(workHomeCoronaSelf=="Yes", na.rm = TRUE)) %>%
full_join(exposure, wfh_prox, by = "soc2010") %>%
mutate(wfh_prox = proximity * (1-wfh))
high_risk <- wfh_prox[order(wfh_prox$wfh_prox, decreasing = T),][1:10,]
low_risk <- wfh_prox[order(wfh_prox$wfh_prox, decreasing = F),][1:10,]
(1-wfh_prox$wfh)*wfh_prox$proximity
wfh_prox %>%
ggplot(aes(x = proximity, y = wfh, label = occ_title, size = wfh*10)) +
geom_point(alpha = .2) +
geom_text() +
theme_bw() +
scale_size_continuous(guide = FALSE) +
xlab("Occupational proximity risk") +
ylab("Fraction working from home due to COVID") +
scale_size_identity(trans="sqrt",guide=FALSE) +
ggtitle("Occupational proximity risk and proportion working from home")
ggsave("output/proximity_by_wfh.png", width = 10, height = 7)
exposure <- read_csv("data/occupations_exposure_to_disease_data.csv") %>%
transmute(occ_title = `Occupation title`,
soc2010 = `UK SOC 2010 Code`,
proximity = `Proximity to others`,
proximity_binned = factor(cut(proximity, 3), labels = c("low","mid","high")))
occ_rates_min <- readstata13::read.dta13("data/LFS/ourmin_combined.dta")
obs_min <- readstata13::read.dta13("data/LFS/obsmin.dta")
obs_min <- tibble(obs_min) %>%
group_by(soc_minor) %>%
summarise(obs_aprjun18 = sum(obs_aprjun18),
obs_janmar19 = sum(obs_janmar19),
obs_octdec19 = sum(obs_octdec19),
obs_aprjun20 = sum(obs_aprjun20))
occ_rates_min <- tibble(full_join(occ_rates_min, obs_min, by = "soc_minor"))
to_plot <- exposure %>% mutate(soc_minor = substring(soc2010,1,3)) %>%
group_by(soc_minor) %>%
summarize(proximity = mean(proximity)) %>%
mutate(soc_minor = as.numeric(soc_minor)) %>%
full_join(occ_rates_min,
by = "soc_minor") %>%
mutate(our_diff = our_aprjun20 - our_octdec19,
n = (obs_octdec19 + obs_aprjun20)/2)
to_plot %>%
ggplot(aes(y = proximity, x = our_octdec19, size = sqrt(obs_octdec19), weight = obs_octdec19)) +
geom_point(col = alpha("black", .4)) +
theme_bw() +
ylab("Occupational proximity risk") +
xlab("Occupational unemployment rate") +
geom_smooth(method = "lm", col = "black") +
guides(size = "none") +
ylim(c(0,100))
ggsave(paste0("output/proximity_v_pre_crisis_oup.png"), width = 5, height = 4)
to_plot %>%
ggplot(aes(y = proximity, x = our_diff, size = sqrt(n), weight = n)) + geom_point(col = alpha("black", .4)) +
theme_bw() +
ylab("Occupational proximity risk") +
xlab("Change in occupational unemployment rate") +
geom_smooth(method = "lm", col = "black") +
guides(size = "none") +
ylim(c(0,100))
ggsave(paste0("output/proximity_v_change_in_oup.png"), width = 5, height = 4)
tibble(reshape2::melt(prop.table(table(bes_reduced$wave,bes_reduced$taxSpendSelf),1))) %>%
filter(!is.na(value)) %>%
mutate(wave = as.character(Var1),
Var2 = factor(Var2, levels = 1:11)) %>%
ggplot(aes(x=Var2, y = value, fill = wave)) +
geom_bar(position = "dodge", stat = "identity") +
theme_bw() +
ylab("Proportion of respondents") +
xlab("taxSpendSelf") +
scale_fill_manual("Wave", values = c(alpha("black", .2), alpha("black", .4), alpha("black", .6), "black"))
ggsave("output/taxSpendSelf_distribution_by_wave.png", width = 8, height = 4)
sd_est <- bes_reduced %>%
group_by(wave, proximity_binned) %>%
summarise(sd_taxSpendSelf = sd(taxSpendSelf, na.rm = TRUE),
.groups = "drop")
do_boot <- function(){
x <- bes_reduced %>%
slice_sample(prop = 1, replace = TRUE) %>%
group_by(wave, proximity_binned) %>%
summarise(sd_taxSpendSelf = sd(taxSpendSelf, na.rm = TRUE),
.groups = "drop")
x$sd_taxSpendSelf
}
boot_out <- replicate(1000, do_boot())
boot_low <- apply(boot_out,1,quantile, 0.025, na.rm = TRUE)
boot_hi <- apply(boot_out,1,quantile, 0.975, na.rm = TRUE)
sd_est$est <- sd_est$sd_taxSpendSelf
sd_est$lo <- boot_low
sd_est$hi <- boot_hi
sd_est %>%
mutate(date = case_when(wave == 14 ~ "2018-05-14",
wave == 15 ~ "2019-03-11",
wave == 16 ~ "2019-05-24",
wave == 17 ~ "2019-11-01",
wave == 18 ~ "2019-11-13",
wave == 19 ~ "2019-12-13",
wave == 20 ~ "2020-06-03"),
date = as.Date(date)) %>%
filter(!is.na(proximity_binned) & !is.na(sd_taxSpendSelf)) %>%
ggplot(aes(x = date, y = est,
ymin = lo, ymax = hi,
col = proximity_binned,
group = proximity_binned)) +
geom_pointrange(position = position_dodge(15)) + geom_line() +
theme_bw() +
xlab("Wave") +
ylab("sd(taxSpendSelf)") +
ylim(c(1,3)) +
scale_color_manual("Occupational proximity risk", values = c("gray", "black", "red")) +
scale_x_date(breaks = as.Date(c("2018-05-14", "2019-03-11", "2019-11-13", "2020-03-26", "2020-06-03")), date_labels = "%b-%Y") +
theme(legend.position = "bottom") +
geom_vline(xintercept = as.Date("2020-03-26"), linetype = 2)
ggsave("output/taxSpendSelf_polarization_by_wave_by_group.png", width = 8, height = 4)
dir.create(c("working", "output"))
?dir.create
paths <- c("working", "output")
apply(1:length(paths), function(i) dir.create(paths[i]))
paths <- c("working", "output")
apply(1:length(paths), function(i) dir.create(paths[i]))
paths <- c("working", "output")
sapply(1:length(paths), function(i) dir.create(paths[i]))
paths <- c("working", "output")
sapply(1:length(paths), function(i) dir.create(paths[i]))
source("01_prep_bes.R")
source("02_analysis.R")
sessionInfo()
